# 需要爬取的地址： http://www.tianqihoubao.com/lishi/shenzhen/month/202412.html
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import re
import pandas as pd

options = webdriver.EdgeOptions()
driver = webdriver.Edge(options)  # 启用edge浏览器
driver.get("http://www.tianqihoubao.com/lishi/shenzhen/month/202412.html")  # 访问天气网站

df = pd.DataFrame({
    'datetime': [],
    'day_weather': [],
    'night_weather': [],
    'temperature_low': [],
    'temperature_top': [],
    'day_wind_direction': [],  # 白天风向
    'night_wind_direction': [],  # 夜晚风向
    'day_wind_min_power': [],  # 白天最小风力
    'day_wind_max_power': [],  # 白天最大风力
    'night_wind_min_power': [],  # 夜晚最大风力
    'night_wind_max_power': [],  # 夜晚最大风力
})

trs = driver.find_elements(By.CSS_SELECTOR, "#content table tr")
for tr in trs[1:]:
    tds = tr.find_elements(By.CSS_SELECTOR, "td")
    # 2024年12月01日
    # 晴 /晴
    # 24℃ / 15℃
    # 北风 1-3级 /北风 1-3级
    # print(tds[0].text)
    date = re.findall(r"(\d+)年(\d+)月(\d+)日", tds[0].text)[0]
    weather = re.findall(r"(\S+) /(\S+)", tds[1].text)[0]
    temperature = re.findall(r"(-?\d+)℃ / (-?\d+)℃", tds[2].text)[0]
    winds = re.findall(r"(\S+) (\d+)-(\d+)级 /(\S+) (\d+)-(\d+)级", tds[3].text)[0]

    df.loc[len(df)] = [
        ''.join(date),
        weather[0],
        weather[1],
        int(temperature[0]),
        int(temperature[1]),
        winds[0],
        winds[3],
        int(winds[1]),
        int(winds[2]),
        int(winds[4]),
        int(winds[5])
    ]

df.to_csv('weather.csv', index=False, encoding='utf-8')
